#CRAN packages:
library(tidyverse)
## ── Attaching packages ──────────────────────────────── tidyverse 1.2.1 ──
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.2
## ✓ tibble  3.0.1     ✓ dplyr   1.0.0
## ✓ tidyr   1.1.0     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.4.0
## ── Conflicts ─────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(sf)
## Linking to GEOS 3.6.2, GDAL 2.2.3, PROJ 4.9.3
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(tidycensus)
library(ggExtra)
library(ggridges)
## 
## Attaching package: 'ggridges'
## The following object is masked from 'package:ggplot2':
## 
##     scale_discrete_manual
library(rstan)
## Loading required package: StanHeaders
## rstan (Version 2.18.2, GitRev: 2e1f913d3ca3)
## For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores()).
## To avoid recompilation of unchanged Stan programs, we recommend calling
## rstan_options(auto_write = TRUE)
## 
## Attaching package: 'rstan'
## The following object is masked from 'package:tidyr':
## 
##     extract
library(drc)
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## 'drc' has been loaded.
## Please cite R and 'drc' if used for a publication,
## for references type 'citation()' and 'citation('drc')'.
## 
## Attaching package: 'drc'
## The following objects are masked from 'package:stats':
## 
##     gaussian, getInitial
library(spdep)
## Loading required package: sp
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
library(mgcv)
## Loading required package: nlme
## 
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
## 
##     collapse
## This is mgcv 1.8-28. For overview type 'help("mgcv-package")'.
library(broom)
library(MASS)
library(spatialreg)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## Registered S3 methods overwritten by 'spatialreg':
##   method                   from 
##   residuals.stsls          spdep
##   deviance.stsls           spdep
##   coef.stsls               spdep
##   print.stsls              spdep
##   summary.stsls            spdep
##   print.summary.stsls      spdep
##   residuals.gmsar          spdep
##   deviance.gmsar           spdep
##   coef.gmsar               spdep
##   fitted.gmsar             spdep
##   print.gmsar              spdep
##   summary.gmsar            spdep
##   print.summary.gmsar      spdep
##   print.lagmess            spdep
##   summary.lagmess          spdep
##   print.summary.lagmess    spdep
##   residuals.lagmess        spdep
##   deviance.lagmess         spdep
##   coef.lagmess             spdep
##   fitted.lagmess           spdep
##   logLik.lagmess           spdep
##   fitted.SFResult          spdep
##   print.SFResult           spdep
##   fitted.ME_res            spdep
##   print.ME_res             spdep
##   print.lagImpact          spdep
##   plot.lagImpact           spdep
##   summary.lagImpact        spdep
##   HPDinterval.lagImpact    spdep
##   print.summary.lagImpact  spdep
##   print.sarlm              spdep
##   summary.sarlm            spdep
##   residuals.sarlm          spdep
##   deviance.sarlm           spdep
##   coef.sarlm               spdep
##   vcov.sarlm               spdep
##   fitted.sarlm             spdep
##   logLik.sarlm             spdep
##   anova.sarlm              spdep
##   predict.sarlm            spdep
##   print.summary.sarlm      spdep
##   print.sarlm.pred         spdep
##   as.data.frame.sarlm.pred spdep
##   residuals.spautolm       spdep
##   deviance.spautolm        spdep
##   coef.spautolm            spdep
##   fitted.spautolm          spdep
##   print.spautolm           spdep
##   summary.spautolm         spdep
##   logLik.spautolm          spdep
##   print.summary.spautolm   spdep
##   print.WXImpact           spdep
##   summary.WXImpact         spdep
##   print.summary.WXImpact   spdep
##   predict.SLX              spdep
## 
## Attaching package: 'spatialreg'
## The following objects are masked from 'package:spdep':
## 
##     anova.sarlm, as_dgRMatrix_listw, as_dsCMatrix_I, as_dsCMatrix_IrW,
##     as_dsTMatrix_listw, as.spam.listw, bptest.sarlm, can.be.simmed,
##     cheb_setup, coef.gmsar, coef.sarlm, coef.spautolm, coef.stsls,
##     create_WX, deviance.gmsar, deviance.sarlm, deviance.spautolm,
##     deviance.stsls, do_ldet, eigen_pre_setup, eigen_setup, eigenw,
##     errorsarlm, fitted.gmsar, fitted.ME_res, fitted.sarlm,
##     fitted.SFResult, fitted.spautolm, get.ClusterOption,
##     get.coresOption, get.mcOption, get.VerboseOption,
##     get.ZeroPolicyOption, GMargminImage, GMerrorsar, griffith_sone,
##     gstsls, Hausman.test, HPDinterval.lagImpact, impacts, intImpacts,
##     Jacobian_W, jacobianSetup, l_max, lagmess, lagsarlm, lextrB,
##     lextrS, lextrW, lmSLX, logLik.sarlm, logLik.spautolm, LR.sarlm,
##     LR1.sarlm, LR1.spautolm, LU_prepermutate_setup, LU_setup,
##     Matrix_J_setup, Matrix_setup, mcdet_setup, MCMCsamp, ME, mom_calc,
##     mom_calc_int2, moments_setup, powerWeights, predict.sarlm,
##     predict.SLX, print.gmsar, print.ME_res, print.sarlm,
##     print.sarlm.pred, print.SFResult, print.spautolm, print.stsls,
##     print.summary.gmsar, print.summary.sarlm, print.summary.spautolm,
##     print.summary.stsls, residuals.gmsar, residuals.sarlm,
##     residuals.spautolm, residuals.stsls, sacsarlm, SE_classic_setup,
##     SE_interp_setup, SE_whichMin_setup, set.ClusterOption,
##     set.coresOption, set.mcOption, set.VerboseOption,
##     set.ZeroPolicyOption, similar.listw, spam_setup, spam_update_setup,
##     SpatialFiltering, spautolm, spBreg_err, spBreg_lag, spBreg_sac,
##     stsls, subgraph_eigenw, summary.gmsar, summary.sarlm,
##     summary.spautolm, summary.stsls, trW, vcov.sarlm, Wald1.sarlm
library(here)
## here() starts at /home/carrid08/COVID_19_admin_disparities
library(pdftools)
## Using poppler version 0.74.0
library(matrixStats)
## 
## Attaching package: 'matrixStats'
## The following object is masked from 'package:dplyr':
## 
##     count
library(egg)
## Loading required package: gridExtra
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(ggpubr)
## 
## Attaching package: 'ggpubr'
## The following object is masked from 'package:egg':
## 
##     ggarrange
library(scales)
## 
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor
#### SESSION CONFIGURATIONS ####
here() # current working directory
## [1] "/home/carrid08/COVID_19_admin_disparities"
# if not already set via an environment variable, cache MTA turnstile data within the working directory
if(Sys.getenv("MTA_TURNSTILE_DATA_DIR") == ""){
  mta_dir = here("data/mta_turnstile")
  if(!dir.exists(mta_dir)) dir.create(mta_dir, recursive = TRUE)
  Sys.setenv(MTA_TURNSTILE_DATA_DIR = mta_dir)
} 
## R packages available from GitHub respositories via: 
#remotes::install_github("justlab/Just_universal", ref = "78812f519da11502706a5061e7b8bc4812e5c3b5") 
#remotes::install_github("justlab/MTA_turnstile", ref = "6c8bd7690dfa6036bf991cb4504f42631e8f6756")
library(Just.universal) 
library(MTA.turnstile) # if not already set, this will process MTA data in a temporary directory
## MTA.turnstile data directory: /home/carrid08/COVID_19_admin_disparities/data/mta_turnstile
if(!dir.exists(here("figures"))) dir.create(here("figures"))
Sys.time() # print the start time
## [1] "2020-06-21 20:00:10 EDT"
##To generate census data, you must set an API key, which you can request here: https://api.census.gov/data/key_signup.html
#census_api_key("INSERT YOUR CENSUS API KEY HERE", install = TRUE) 
if(Sys.getenv("CENSUS_API_KEY")=="") stop("Census API Key Missing. Please see ?census_api_key()")

export.figs = FALSE #change to TRUE if you would like to save out figures 

# data will default to a subfolder "data/" within working directory
# unless 1. set by an environment variable:
data.root = Sys.getenv("COVID_DATA")
# or 2. set with an alternative path here:
if (data.root == "") data.root = here("data")
if (data.root == "data" & !dir.exists(data.root)) dir.create(here("data"))
print(paste("data being downloaded into directory", dQuote(data.root)))
## [1] "data being downloaded into directory \"/home/carrid08/COVID_19_admin_disparities/data\""
##### FUNCTIONS ####

read_w_filenames <- function(flnm) {
  read_csv(flnm) %>%
    mutate(filename = flnm)
}

extract_waic <- function (stanfit){
  log_lik <- rstan::extract(stanfit, "log_lik")$log_lik
  dim(log_lik) <- if (length(dim(log_lik)) == 1) 
    c(length(log_lik), 1)
  else c(dim(log_lik)[1], prod(dim(log_lik)[2:length(dim(log_lik))]))
  S <- nrow(log_lik)
  n <- ncol(log_lik)
  lpd <- log(colMeans(exp(log_lik)))
  p_waic <- colVars(log_lik)
  elpd_waic <- lpd - p_waic
  waic <- -2 * elpd_waic
  loo_weights_raw <- 1/exp(log_lik - max(log_lik))
  loo_weights_normalized <- loo_weights_raw/matrix(colMeans(loo_weights_raw), 
                                                   nrow = S, ncol = n, byrow = TRUE)
  loo_weights_regularized <- pmin(loo_weights_normalized, sqrt(S))
  elpd_loo <- log(colMeans(exp(log_lik) * loo_weights_regularized)/colMeans(loo_weights_regularized))
  p_loo <- lpd - elpd_loo
  pointwise <- cbind(waic, lpd, p_waic, elpd_waic, p_loo, elpd_loo)
  total <- colSums(pointwise)
  se <- sqrt(n * colVars(pointwise))
  return(list(waic = total["waic"], elpd_waic = total["elpd_waic"], 
              p_waic = total["p_waic"], elpd_loo = total["elpd_loo"], 
              p_loo = total["p_loo"]))
}

quantile_split <- function (data, mix_name = mix_name, q, shift = TRUE) {
  if (shift) {
    for (i in 1:length(mix_name)) {
      dat_num = as.numeric(unlist(data[, mix_name[i]]))
      data[[mix_name[i]]] = cut(dat_num, breaks = unique(quantile(dat_num, 
                                                                  probs = seq(0, 1, by = 1/q), na.rm = TRUE)), 
                                labels = FALSE, include.lowest = TRUE) - 1
    }
  }
  else {
    for (i in 1:length(mix_name)) {
      dat_num = as.numeric(unlist(data[, mix_name[i]]))
      data[[mix_name[i]]] = cut(dat_num, breaks = unique(quantile(dat_num, 
                                                                  probs = seq(0, 1, by = 1/q), na.rm = TRUE)), 
                                labels = FALSE, include.lowest = TRUE)
    }
  }
  return(data)
}

download = function(url, to, f, ...){
    download.update.meta(data.root, url, to, f, ...)
}


##### Load Data #####


# get the Pluto dataset from #https://www1.nyc.gov/site/planning/data-maps/open-data/dwn-pluto-mappluto.page 
Pluto = download(
    "https://www1.nyc.gov/assets/planning/download/zip/data-maps/open-data/nyc_pluto_20v3_csv.zip",
    "pluto.zip",
    function(p)
        read_csv(unz(p, "pluto_20v3.csv"), col_types = cols(spdist2 = col_character(), 
                                                overlay2 = col_character(),
                                                zonedist4 = col_character())))

Bldg_Footprints <- download(
  # https://data.cityofnewyork.us/Housing-Development/Building-Footprints/nqwf-w8eh
    "https://data.cityofnewyork.us/api/geospatial/nqwf-w8eh?method=export&format=Shapefile",
    "building_footprints.zip",
    function(p)
        st_read(paste0("/vsizip/", p)))
## Multiple layers are present in data source /vsizip//home/carrid08/COVID_19_admin_disparities/data/downloads/building_footprints.zip, reading layer `geo_export_df2b1794-2a22-4b1d-83a3-3e63b7b0516b'.
## Use `st_layers' to list all layer names and their type in a data source.
## Set the `layer' argument in `st_read' to read a particular layer.
## Warning in evalq((function (..., call. = TRUE, immediate. = FALSE, noBreaks. =
## FALSE, : automatically selected the first layer in a data source containing more
## than one.
## Reading layer `geo_export_df2b1794-2a22-4b1d-83a3-3e63b7b0516b' from data source `/vsizip//home/carrid08/COVID_19_admin_disparities/data/downloads/building_footprints.zip' using driver `ESRI Shapefile'
## Simple feature collection with 1085105 features and 15 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -74.2555 ymin: 40.49843 xmax: -73.70006 ymax: 40.91505
## CRS:            4326
ZCTA_by_boro <- download(
    "https://www.health.ny.gov/statistics/cancer/registry/appendix/neighborhoods.htm",
    "uhf_neighborhoods.html",
    function(p)
       {# XML::readHTMLTable doesn't identify the columns correctly.
        x = str_match_all(read_file(p), regex(dotall = T, paste0(
            '<td headers="header1"[^>]+>\\s*(.+?)</td>',
            '(.+?)',
            '(?=<td headers="header1"|</table>)')))[[1]]
        do.call(rbind, lapply(1 : nrow(x), function(i)
            data.frame(boro = x[i, 2], zip = as.integer(
                str_extract_all(x[i, 3], "\\b\\d{5}\\b")[[1]]))))})

# Download the specific day of test results by ZCTA being used
ZCTA_test_download <- download(
  "https://raw.githubusercontent.com/nychealth/coronavirus-data/6d7c4a94d6472a9ffc061166d099a4e5d89cd3e3/tests-by-zcta.csv",
  "2020-05-07_tests-by-zcta.csv",
  identity
)

#Download testing data 
ZCTA_test_series <- ZCTA_test_download %>% 
  map_df(~read_w_filenames(.)) %>%
  mutate(date = as.Date(str_extract(filename, "[:digit:]{4}-[:digit:]{2}-[:digit:]{2}"))) %>%
  dplyr::select(-filename)
## Parsed with column specification:
## cols(
##   MODZCTA = col_double(),
##   Positive = col_double(),
##   Total = col_double(),
##   zcta_cum.perc_pos = col_double()
## )
ZCTAs_in_NYC <- as.character(unique(ZCTA_test_series$MODZCTA))

##Subway ridership data
Subway_ridership_by_UHF <- relative.subway.usage(2020L, "nhood")

#UHF definitions by zip codes
UHF_ZipCodes <- UHF_ZipCodes <- download(
    "http://www.infoshare.org/misc/UHF.pdf",
    "uhf_zips.pdf",
    function(p)
       {x = str_match_all(pdf_text(p)[2],
            "(\\d+)\\s+(\\S.+?\\S)\\s*([0-9,]+)")[[1]]
        do.call(rbind, lapply(1 : nrow(x), function(i)
            data.frame(code = x[i, 2], name = x[i, 3], zip = as.integer(
                str_extract_all(x[i, 4], "\\b\\d{5}\\b")[[1]]))))})

#UHF shapefile 
UHF_shp <- download(
    "https://www1.nyc.gov/assets/doh/downloads/zip/uhf42_dohmh_2009.zip",
    "nyc_uhf_nhoods_shapefile.zip",
    function(p) read_sf(paste0("/vsizip/", p, "/UHF_42_DOHMH_2009")))

# NYC boroughs from NYC Open Data
NYC_basemap_shp <- download(
  "https://data.cityofnewyork.us/api/geospatial/tqmj-j8zm?method=export&format=Shapefile",
  "Borough_Boundaries.zip",
  function(p){
    unzip(p, exdir = file.path(data.root, "downloads"))
    # open data platform generates a random UUID for every download
    ufile = list.files(file.path(data.root, "downloads"), pattern = "geo_export_.*\\.shp", full.names = TRUE)
    st_read(ufile, stringsAsFactors = FALSE, quiet = TRUE) %>% st_transform(., crs = 2263)
  } 
)

#DOHMH MODZCTA Shapefile
MODZCTA_NYC_shp <- download(
  "https://data.cityofnewyork.us/api/geospatial/pri4-ifjk?method=export&format=Shapefile",
  "Modified Zip Code Tabulation Areas (MODZCTA).zip", 
  function(p) read_sf(paste0("/vsizip/", p))
)

#Food outlets 
food_retail <- download(
    "https://data.ny.gov/api/views/9a8c-vfzj/rows.csv",
    "retail_food_stores.csv",
    read_csv)
## Parsed with column specification:
## cols(
##   County = col_character(),
##   `License Number` = col_character(),
##   `Operation Type` = col_character(),
##   `Establishment Type` = col_character(),
##   `Entity Name` = col_character(),
##   `DBA Name` = col_character(),
##   `Street Number` = col_character(),
##   `Street Name` = col_character(),
##   `Address Line 2` = col_logical(),
##   `Address Line 3` = col_logical(),
##   City = col_character(),
##   State = col_character(),
##   `Zip Code` = col_double(),
##   `Square Footage` = col_double(),
##   Location = col_character()
## )
# Download deaths by ZCTA as of May 23rd
deaths_by23May2020_by_zcta <- download(
  "https://raw.githubusercontent.com/nychealth/coronavirus-data/8d88b2c06cf6b65676d58b28979731faa10c193c/data-by-modzcta.csv",
  "2020-05-23_data-by-modzcta.csv",
  read_csv
)
## Parsed with column specification:
## cols(
##   MODIFIED_ZCTA = col_double(),
##   NEIGHBORHOOD_NAME = col_character(),
##   BOROUGH_GROUP = col_character(),
##   COVID_CASE_COUNT = col_double(),
##   COVID_CASE_RATE = col_double(),
##   POP_DENOMINATOR = col_double(),
##   COVID_DEATH_COUNT = col_double(),
##   COVID_DEATH_RATE = col_double(),
##   PERCENT_POSITIVE = col_double()
## )
# download MODZCTA to ZCTA crosswalk, current version from repo
modzcta_to_zcta <- download(
  "https://raw.githubusercontent.com/nychealth/coronavirus-data/master/Geography-resources/ZCTA-to-MODZCTA.csv",
  "ZCTA-to-MODZCTA.csv",
  read_csv
)
## Parsed with column specification:
## cols(
##   ZCTA = col_double(),
##   MODZCTA = col_double()
## )
##We have many sources of data, so these just help to combine the various data types
NYC_counties1 <- c("Bronx","Kings","Queens","New York","Richmond")
NYC_counties1_full <- c("Bronx County","Kings County","Queens County","New York County","Richmond County")
NYC_boro_county_match <- tibble(County = c("Bronx","Kings","Queens","New York","Richmond"), 
                                boro = c("Bronx","Brooklyn","Queens","Manhattan","Staten Island"), 
                                full_county = c("Bronx County","Kings County","Queens County","New York County","Richmond County"))

#upload the stan code alongside the disparities code 
BWQS_stan_model <- here("code", "nb_bwqs_cov.stan") 



####Census Data Collection and Cleaning####

ACS_Data <- get_acs(geography = "zcta", 
        variables = c(medincome = "B19013_001",
                      total_pop1 = "B01003_001",
                      fpl_100 = "B06012_002", 
                      fpl_100to150 = "B06012_003",
                      median_rent = "B25031_001",
                      total_hholds1 = "B22003_001",
                      hholds_snap = "B22003_002",
                      over16total_industry1 = "C24050_001",
                      ag_industry = "C24050_002",
                      construct_industry = "C24050_003",
                      manufact_industry = "C24050_004",
                      wholesaletrade_industry = "C24050_005",
                      retail_industry = "C24050_006",
                      transpo_and_utilities_industry = "C24050_007",
                      information_industry = "C24050_008",
                      finance_and_realestate_industry = "C24050_009",
                      science_mngmt_admin_industry = "C24050_010",
                      edu_health_socasst_industry = "C24050_011",
                      arts_entertain_rec_accomodate_industry = "C24050_012",
                      othsvcs_industry = "C24050_013",
                      publicadmin_industry = "C24050_014",
                      total_commute1 = "B08301_001",
                      drove_commute = "B08301_002",
                      pubtrans_bus_commute = "B08301_011",
                      pubtrans_subway_commute = "B08301_013",
                      pubtrans_railroad_commute = "B08301_013",
                      pubtrans_ferry_commute = "B08301_015",
                      taxi_commute = "B08301_016",
                      bicycle_commute = "B08301_018",
                      walked_commute = "B08301_019",
                      workhome_commute = "B08301_021",
                      unemployed = "B23025_005",
                      under19_noinsurance = "B27010_017",
                      age19_34_noinsurance = "B27010_033",
                      age35_64_noinsurance = "B27010_050",
                      age65plus_noinsurance = "B27010_066",
                      hisplat_raceethnic = "B03002_012",
                      nonhispLat_white_raceethnic = "B03002_003",
                      nonhispLat_black_raceethnic = "B03002_004",
                      nonhispLat_amerindian_raceethnic = "B03002_005",
                      nonhispLat_asian_raceethnic = "B03002_006",
                      age65_plus  = "B08101_008"),
        year = 2018,
        output = "wide",
        survey = "acs5")
## Getting data from the 2014-2018 5-year ACS
## Warning: `funs()` is deprecated as of dplyr 0.8.0.
## Please use a list of either functions or lambdas: 
## 
##   # Simple named list: 
##   list(mean = mean, median = median)
## 
##   # Auto named with `tibble::lst()`: 
##   tibble::lst(mean, median)
## 
##   # Using lambdas
##   list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
ACS_Data1 <- ACS_Data %>% #only pull out the estimates and cleaning variable names
  filter(GEOID %in% ZCTAs_in_NYC) %>%
  dplyr::select(-NAME)  %>%
  dplyr::select(GEOID, !ends_with("M")) %>%
  rename_at(vars(ends_with("E")), .funs = list(~str_sub(., end = -2)))


ACS_Data2 <- ACS_Data1 %>%
  mutate_at(vars(ends_with("_commute")), ~round((./total_commute1)*100, 2)) %>% #proportion of people relying on a given mode of transit
  mutate_at(vars(ends_with("_raceethnic")), ~round((./total_pop1)*100, 2)) %>% #proportion of ppl reporting a given race/ethncity 
  mutate(not_insured = round(((under19_noinsurance + age19_34_noinsurance + age35_64_noinsurance + age65plus_noinsurance) / total_pop1)*100, 2), #proportion uninsured
         snap_hholds = round((hholds_snap/total_hholds1)*100, 2), #proportion relying on SNAP
         fpl_150 = round(((fpl_100+fpl_100to150)/total_pop1)*100, 2), #proportion 150% or less of FPL
         unemployed = round((unemployed/over16total_industry1)*100, 2), #proportion unemployed
         not_quarantined_jobs = round(((ag_industry+(construct_industry*.25)+wholesaletrade_industry+ #an estimate of who is still leaving the house for work based on industry
                                        (edu_health_socasst_industry*.5)+transpo_and_utilities_industry)/over16total_industry1)*100, 2)) %>%
  dplyr::select(-ends_with("_noinsurance"), -fpl_100, -fpl_100to150, -ends_with("_industry"), -hholds_snap) %>%
  rename(zcta = "GEOID")


#### Estimating the mode of transportation for essential workers ####

ACS_EssentialWrkr_Commute <- get_acs(geography = "zcta", #pull down the relevant categories 
                                     variables = c(ag_car1_commute = "B08126_017",
                                                   ag_pubtrans_commute = "B08126_047",
                                                   construct_car1_commute ="B08126_018",
                                                   construct_pubtrans_commute = "B08126_048",
                                                   wholesale_car1_commute = "B08126_020",
                                                   wholesale_pubtrans_commute = "B08126_050",
                                                   transpo_car1_commute = "B08126_022",
                                                   transpo_pubtrans_commute = "B08126_052",
                                                   ed_hlthcare_car1_commute = "B08126_026",
                                                   ed_hlthcare_pubtrans_commute = "B08126_056"),
                                     year = 2018, 
                                     output = "wide",
                                     survey = "acs5")
## Getting data from the 2014-2018 5-year ACS
ACS_EssentialWrkr_Commute1 <- ACS_EssentialWrkr_Commute %>% #clean data and aggregate 
  dplyr::select(-ends_with("M"), -NAME) %>%
  filter(GEOID %in% ZCTAs_in_NYC) %>%
  mutate_at(vars(starts_with("ed_hlthcare")), ~round(./2), 0) %>% #maintain same proportions as estimated nonquarintined jobs
  mutate_at(vars(starts_with("construct")), ~round(./4), 0) %>%
  mutate(essentialworker_drove = rowSums(dplyr::select(., contains("car1_commute"))), 
         essentialworker_pubtrans = rowSums(dplyr::select(., contains("pubtrans")))) %>%
  rename(zcta = GEOID) %>%
  dplyr::select(zcta, essentialworker_drove, essentialworker_pubtrans)


#### Identify the number of supermarkets/grocery stores per area ####
non_supermarket_strings <- c("DELI|TOBACCO|GAS|CANDY|7 ELEVEN|7-ELEVEN|LIQUOR|ALCOHOL|BAKERY|CHOCOLATE|DUANE READE|WALGREENS|CVS|RITE AID|RAVIOLI|WINERY|WINE|BEER|CAFE|COFFEE")

food_retail1 <- food_retail %>% #an estimate of how many supermarkets are in a ZCTA
  filter(County %in% NYC_boro_county_match$County) %>% #get rid of those that have the non_supermarket_strings words in their store & legal names
  filter(str_detect(`Establishment Type`, "J") & str_detect(`Establishment Type`, "A") & str_detect(`Establishment Type`, "C") &
           !str_detect(`Establishment Type`, "H")) %>%
  filter(!str_detect(`Entity Name`, non_supermarket_strings) & !str_detect(`DBA Name`, non_supermarket_strings)) %>%
  filter(`Square Footage`>=4500) %>%
  mutate(zcta = as.character(str_extract(Location, "[:digit:]{5}"))) %>%
  group_by(zcta) %>%
  summarise(grocers = n_distinct(`License Number`))
## `summarise()` ungrouping output (override with `.groups` argument)
### Where are subway stations located? ###

SubwayStation_shp <- as_tibble(turnstile()$stations) %>%
  st_as_sf(., coords = c("lon", "lat"), crs = 4269) %>%
  st_transform(., crs = 2263) %>%
  filter(!str_detect(ca, "PTH")) #removing New Jersey PATH stations

### Calculate the residential area per ZCTA ###

Pluto_ResOnly <- Pluto %>%
  filter(landuse>="01" & landuse<="04") %>%
  mutate(base_bbl = as.character(bbl)) %>%
  dplyr::select(-bbl)

ResBBLs <- as.character(Pluto_ResOnly$base_bbl)

Res_Bldg_Footprints <- Bldg_Footprints %>%
  st_set_geometry(., NULL) %>%
  mutate(base_bbl = as.character(base_bbl)) %>%
  filter(base_bbl %in% ResBBLs &
           feat_code == "2100") %>%
  mutate(bldg_volume = shape_area * heightroof) %>%
  left_join(., Pluto_ResOnly, by = "base_bbl") %>%
  mutate(bldg_volume = if_else(is.na(bldg_volume), shape_area*numfloors*10, bldg_volume),
         res_volume = (bldg_volume/unitstotal)*unitsres, 
         zcta = as.character(zipcode)) %>%
  group_by(zcta) %>%
  summarise(total_res_volume_zcta = sum(res_volume, na.rm = TRUE)) 
## `summarise()` ungrouping output (override with `.groups` argument)
#### COVID Tests  ####
MODZCTA_NYC_shp1 <- MODZCTA_NYC_shp %>%
  dplyr::select(modzcta, geometry) %>%
  rename("zcta" = "modzcta")

May7_tests <- ZCTA_test_series %>%
  filter(date=="2020-05-07") %>%
  mutate(zcta = as.character(MODZCTA)) %>%
  rename(total_tests = "Total") %>%
  dplyr::select(zcta, date, Positive, total_tests)

ZCTA_by_boro1 <- ZCTA_by_boro %>%
  mutate(boro = as.character(boro),
         zcta = as.character(zip)) %>%
  dplyr::select(-zip) %>%
  bind_rows(., 
            tibble(boro = as.character(c("Manhattan", "Manhattan" ,"Queens")), #correcting nas
                     zcta = as.character(c("10069", "10282", "11109"))))

# Supplemental Figure 1 (A - Tests)
sfig1a <- MODZCTA_NYC_shp1 %>%
  left_join(., May7_tests, by = "zcta") %>%
  left_join(., ACS_Data2, by = "zcta") %>%
  filter(zcta != "99999") %>%
  mutate(pos_per_100000 = (Positive/total_pop1)*100000) %>%
  ggplot() +
  geom_sf(data = NYC_basemap_shp)+
  geom_sf(aes(fill = pos_per_100000), lwd = 0.2)+
  labs(fill = "Positives per 100,000") +
  ggtitle("Cumulative Positive COVID tests by zip code (May 7, 2020)") +
  scale_fill_gradientn(colours=brewer_pal("BuPu", type = "seq")(7)) + 
  theme_bw() +
  theme_bw(base_size = 6) + 
  theme(legend.title = element_text(face = "bold", size = 6), 
        panel.background = element_rect(fill = "#dedede"), 
        legend.background = element_rect(fill = "transparent"),
        legend.position = c(0.15, 0.80),
        legend.text = element_text(size = 6),
        plot.margin = unit(c(4,0,4,0), "pt"),
        legend.key.size = unit(1.1, "lines"))
sfig1a 

#### Create data frames of all above information ####

ZCTA_ACS_COVID_shp <- MODZCTA_NYC_shp1 %>%
  st_transform(., crs = 2263) %>%
  dplyr::select(zcta, geometry) %>%
  left_join(., ACS_Data2, by = "zcta") %>%
  left_join(., May7_tests, by = "zcta") %>%
  left_join(., Res_Bldg_Footprints, by = "zcta") %>%
  left_join(., ACS_EssentialWrkr_Commute1, by = "zcta") %>%
  left_join(., food_retail1, by = "zcta") %>%
  mutate(pop_density = as.numeric(total_pop1/st_area(geometry)),
         avg_hhold_size = round((total_pop1/total_hholds1), 2),
         pos_per_100000 = (Positive/total_pop1)*100000,
         testing_ratio = (total_tests/total_pop1),
         res_vol_zctadensity = as.numeric(total_res_volume_zcta/st_area(geometry)), 
         res_vol_popdensity = as.numeric(total_pop1/total_res_volume_zcta),
         pubtrans_ferrysubway_commute = pubtrans_subway_commute + pubtrans_ferry_commute,
         grocers = replace_na(grocers, 0),
         grocers_per_1000 = (grocers/total_pop1)*1000,
         pos_per_100000 = round(pos_per_100000, 0),
         valid_var = "0",
         didnot_workhome_commute = 1/workhome_commute,
         one_over_grocers_per_1000 = if_else(is.infinite(1/grocers_per_1000), 0, 1/grocers_per_1000),
         one_over_medincome = 1/medincome) %>%
  dplyr::select(-pubtrans_subway_commute, -pubtrans_ferry_commute) %>%
  left_join(., ZCTA_by_boro1, by = "zcta") %>%
  mutate_at(vars(starts_with("essentialworker_")), ~round((./over16total_industry1)*100, 2)) %>%
  filter(zcta != "99999") #remove na

ZCTA_ACS_COVID <- ZCTA_ACS_COVID_shp %>%
  st_set_geometry(., NULL) #remove geometry

### Cleaning done --- Now for the Analysis ###


#### Part 1: Creation of BWQS Neighborhood Infection Risk Scores ####

#Step 1: Create univariate scatterplots to make sure direction of associations are consistent for all variables, otherwise BWQS is biased
ggplot(ZCTA_ACS_COVID, aes(x = testing_ratio, y = pos_per_100000)) + geom_point() + geom_smooth(method = "lm") #covariate
## `geom_smooth()` using formula 'y ~ x'

ggplot(ZCTA_ACS_COVID, aes(x = one_over_medincome, y = pos_per_100000)) + geom_point() + geom_smooth(method = "lm")
## `geom_smooth()` using formula 'y ~ x'

ggplot(ZCTA_ACS_COVID, aes(x = not_insured, y = pos_per_100000)) + geom_point() + geom_smooth(method = "lm") 
## `geom_smooth()` using formula 'y ~ x'

ggplot(ZCTA_ACS_COVID, aes(x = one_over_grocers_per_1000, y = pos_per_100000)) + geom_point() + geom_smooth(method = "lm")
## `geom_smooth()` using formula 'y ~ x'

ggplot(ZCTA_ACS_COVID, aes(x = unemployed, y = pos_per_100000)) + geom_point() + geom_smooth(method = "lm") 
## `geom_smooth()` using formula 'y ~ x'

ggplot(ZCTA_ACS_COVID, aes(x = res_vol_popdensity, y = pos_per_100000)) + geom_point() + geom_smooth(method = "lm") 
## `geom_smooth()` using formula 'y ~ x'

ggplot(ZCTA_ACS_COVID, aes(x = didnot_workhome_commute, y = pos_per_100000)) + geom_point() + geom_smooth(method = "lm") 
## `geom_smooth()` using formula 'y ~ x'

ggplot(ZCTA_ACS_COVID, aes(x = not_quarantined_jobs, y = pos_per_100000)) + geom_point() + geom_smooth(method = "lm") 
## `geom_smooth()` using formula 'y ~ x'

ggplot(ZCTA_ACS_COVID, aes(x = avg_hhold_size, y = pos_per_100000)) + geom_point() + geom_smooth(method = "lm")
## `geom_smooth()` using formula 'y ~ x'

ggplot(ZCTA_ACS_COVID, aes(x = essentialworker_drove, y = pos_per_100000)) + geom_point() + geom_smooth(method = "lm") 
## `geom_smooth()` using formula 'y ~ x'

ggplot(ZCTA_ACS_COVID, aes(x = essentialworker_pubtrans, y = pos_per_100000)) + geom_point() + geom_smooth(method = "lm") 
## `geom_smooth()` using formula 'y ~ x'

SES_vars <- names(ZCTA_ACS_COVID %>% dplyr::select(one_over_medincome, not_insured, one_over_grocers_per_1000, unemployed, 
                                                    not_quarantined_jobs, essentialworker_pubtrans, essentialworker_drove, 
                                                    didnot_workhome_commute, res_vol_popdensity, avg_hhold_size))

#Step 2a: Examine relationships between explanatory variables to make sure nothing >0.9 correlation, as this could bias BWQS
Cors_SESVars <- cor(x = ZCTA_ACS_COVID %>% dplyr::select(all_of(SES_vars)), method = "kendall")
Cors_SESVars1 <- as.data.frame(Cors_SESVars)
Cors_SESVars1$var1 <- row.names(Cors_SESVars1)
Cors_SESVars2 <- gather(data = Cors_SESVars1, key = "var2", value = "correlation", -var1) %>%
  filter(var1 != var2)


## Step 2b: Examine Univariable kendall associations for all selected variables with the outcome  
bind_cols(Variables = SES_vars,
          
          ZCTA_ACS_COVID %>%
            dplyr::select(all_of(SES_vars), pos_per_100000) %>%
            summarise_at(vars(all_of(SES_vars)), list(~cor(., pos_per_100000, method = "kendall"))) %>%
            t() %>%
            as_tibble() %>%
            rename(`Correlation (Tau)`= "V1"),
          
          ZCTA_ACS_COVID %>%
            dplyr::select(all_of(SES_vars), pos_per_100000) %>%
            summarise_at(vars(all_of(SES_vars)), 
                         list(~cor.test(., pos_per_100000, method = "kendall")$p.value)) %>%
            t() %>%
            as_tibble() %>%
            rename(`p value` = "V1")) %>%
  
  mutate(`Correlation (Tau)` = round(`Correlation (Tau)`, 3),
         `p value` = as.character(ifelse(`p value` < 0.0001, "< 0.0001", round(`p value`, 3))),)
## Warning: The `x` argument of `as_tibble.matrix()` must have column names if `.name_repair` is omitted as of tibble 2.0.0.
## Using compatibility `.name_repair`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
## # A tibble: 10 x 3
##    Variables                 `Correlation (Tau)` `p value`
##    <chr>                                   <dbl> <chr>    
##  1 one_over_medincome                      0.317 < 0.0001 
##  2 not_insured                             0.231 < 0.0001 
##  3 one_over_grocers_per_1000               0.261 < 0.0001 
##  4 unemployed                              0.296 < 0.0001 
##  5 not_quarantined_jobs                    0.544 < 0.0001 
##  6 essentialworker_pubtrans                0.174 0.001    
##  7 essentialworker_drove                   0.487 < 0.0001 
##  8 didnot_workhome_commute                 0.454 < 0.0001 
##  9 res_vol_popdensity                      0.098 0.052    
## 10 avg_hhold_size                          0.467 < 0.0001
#Step 3: Prepare data for BWQS and pass to stan for model fitting 
y = as.numeric(ZCTA_ACS_COVID$pos_per_100000)
X <- ZCTA_ACS_COVID %>%
  dplyr::select(all_of(SES_vars))
K <- ZCTA_ACS_COVID %>%
  dplyr::select(testing_ratio)
X <- quantile_split(data = X, mix = SES_vars, q = 10)
data <-as.data.frame(cbind(y,X)) # Aggregate data in a data.frame

data_list = list(N  = NROW(data), 
                 C  = NCOL(X), 
                 K  = NCOL(K), 
                 XC = cbind(as.matrix(X)), 
                 XK = cbind(K), 
                 Dalp = rep(1,length(SES_vars)), 
                 y = as.vector(data$y))

m1 = stan(file = BWQS_stan_model,
                         data = data_list, chains = 1,
                         warmup = 2500, iter = 20000, cores = 1,
                         thin = 10, refresh = 0, algorithm = "NUTS",
                         seed = 1234, control = list(max_treedepth = 20,
                                                     adapt_delta = 0.999999999999999))
## Loading required package: raster
## 
## Attaching package: 'raster'
## The following object is masked from 'package:ggpubr':
## 
##     rotate
## The following object is masked from 'package:nlme':
## 
##     getData
## The following objects are masked from 'package:MASS':
## 
##     area, select
## The following object is masked from 'package:rstan':
## 
##     extract
## The following object is masked from 'package:dplyr':
## 
##     select
## The following object is masked from 'package:tidyr':
## 
##     extract
# detach("package:raster", unload = TRUE) # may be needed
extract_waic(m1)
## $waic
##    waic 
## 2431.79 
## 
## $elpd_waic
## elpd_waic 
## -1215.895 
## 
## $p_waic
##  p_waic 
## 11.7556 
## 
## $elpd_loo
##  elpd_loo 
## -1215.992 
## 
## $p_loo
##    p_loo 
## 11.85288
vars = c("phi", "beta0", "beta1", "delta1", SES_vars)
parameters_to_drop <- c("phi", "delta1", "beta0", "beta1")
number_of_coefficients <- length(SES_vars) + 4

BWQS_params <- bind_cols(as_tibble(summary(m1)$summary[1:number_of_coefficients,c(1,4,8)]), label = vars)

BWQS_fits <- BWQS_params %>%
  rename(lower = "2.5%",
         upper = "97.5%") %>%
  filter(!label %in% parameters_to_drop) %>%
  arrange(desc(mean)) %>%
  mutate(group = factor(if_else(label == "one_over_medincome"|label =="not_insured"|label == "unemployed", "Finances &\nAccess to care",
                         if_else(label == "one_over_grocers_per_1000", "Food\nAccess",
                                 if_else(str_detect(label, "essential")|label == "not_quarantined_jobs"|label=="didnot_workhome_commute", "Commuting and\nEssential Work",
                                         if_else(label == "avg_hhold_size"|label == "res_vol_popdensity", "Population\nDensity", "Unmatched")))),
                        levels = c("Commuting and\nEssential Work", "Finances &\nAccess to care", "Population\nDensity", "Food\nAccess")))

labels1 <- c("one_over_medincome" = "1/\nMedian income", 
             "not_insured" = "Uninsured", 
             "unemployed" = "Unemployed", 
             "one_over_grocers_per_1000" = "1/\nGrocers per 1000",
             "not_quarantined_jobs" = "Essential Workers", 
             "essentialworker_pubtrans" = "Essential Worker:\nPublic Transit", 
             "essentialworker_drove" = "Essential Worker:\nDriving Commute", 
             "didnot_workhome_commute" = "1/\nWork from home", 
             "res_vol_popdensity" = "Population Density\nby Residential Volume", 
             "avg_hhold_size" = "Average people\nper household")


fig2 <- ggplot(data=BWQS_fits, aes(x= reorder(label, mean), y=mean, ymin=lower, ymax=upper)) +
  geom_pointrange() + 
  coord_flip() + 
  xlab("") + 
  ylab("Mean (95% credible intervals)") +
  scale_x_discrete(labels = labels1) + 
  theme_set(theme_bw(base_size = 18)) +
  facet_grid(group~., scales = "free", space = "free") +
  theme(strip.text.x = element_text(size = 14))
print(fig2)
if(export.figs) ggsave(fig2, filename = here("figures", paste0("fig2", "_", Sys.Date(),".png")), dpi = 600, width = 8, height = 8)

Cors_SESVars_quantiled <- cor(X, method = "kendall")  
Cors_SESVars_quantiled1 <- as.data.frame(Cors_SESVars_quantiled)
Cors_SESVars_quantiled1$var1 <- row.names(Cors_SESVars_quantiled1)
Cors_SESVars_quantiled2 <- gather(data = Cors_SESVars_quantiled1, key = "var2", value = "correlation", -var1) %>%
  filter(var1 != var2)

#Step 4: Use the variable-specific weight on the decile quantile splits to create a 10 point ZCTA-level infection risk score  

BWQS_weights <- as.numeric(summary(m1)$summary[5:number_of_coefficients,c(1)])

ZCTA_ACS_COVID2 <- X*BWQS_weights[col(ZCTA_ACS_COVID)] 

BWQS_index <- ZCTA_ACS_COVID2 %>% 
  dplyr::mutate(BWQS_index = rowSums(.)) %>% 
  dplyr::select(BWQS_index) 

BWQS_predicted_infection_median_testing = exp(BWQS_params[BWQS_params$label == "beta0", ]$mean + 
  (BWQS_params[BWQS_params$label == "beta1", ]$mean * BWQS_index) + 
  (BWQS_params[BWQS_params$label == "delta1", ]$mean * median(K$testing_ratio)))
colnames(BWQS_predicted_infection_median_testing) <- "predicted"

# Visualize the relationship between BWQS index and infection rate
BWQS_scatter <- ggplot(data.frame(BWQS_index, y, BWQS_predicted_infection_median_testing), aes(BWQS_index, y)) + geom_point() + 
  geom_line(aes(y = predicted)) + 
  scale_x_continuous("BWQS infection risk index") + 
  scale_y_continuous("Infections per 100,000", label=comma)
BWQS_scatter <- ggExtra::ggMarginal(BWQS_scatter, type = "histogram", xparams = list(binwidth = 1), yparams = list(binwidth = 200))
print(BWQS_scatter)

if(export.figs) {
  png(filename = here("figures", paste0("fig1_", Sys.Date(), ".png")), width = 96*5, height = 96*5)
  print(BWQS_scatter)
  dev.off()
}

ZCTA_BWQS_COVID_shp <- ZCTA_ACS_COVID_shp %>% bind_cols(., BWQS_index)

#Step 5: Visualize the spatial distribution of ZCTA-level infection risk scores 
# Figure 3
fig3 <- ggplot(ZCTA_BWQS_COVID_shp) + 
  geom_sf(aes(fill = BWQS_index), lwd = 0.2) + 
  scale_fill_gradientn(colours=brewer_pal("YlGnBu", type = "seq")(7)) + 
  #theme_bw(base_size = 15) + 
  theme_bw(base_size = 5) + 
  labs(fill = "BWQS infection risk index") +
  theme(legend.title = element_text(face = "bold", size = 7), 
        panel.background = element_rect(fill = "#dedede"), 
        legend.background = element_rect(fill = "transparent"),
        legend.position = c(0.25, 0.80),
        legend.text = element_text(size = 6),
        legend.key.size = unit(1.1, "lines"))

fig3

if(export.figs) ggsave(plot = fig3, filename = here("figures", paste0("fig3","_",Sys.Date(),".png")), dpi = 600, device = "png", width = 4.5, height = 3.7)

#Step 6: Compare quantile distribution of ZCTA-level BWQS scores by the race/ethnic composition of residents  
Demographics <- ACS_Data1 %>% rename(zcta = "GEOID") %>%
  dplyr::select(zcta,ends_with("_raceethnic"), total_pop1) %>%
  mutate(other_raceethnic = total_pop1 - (rowSums(.[2:6])))

ZCTA_BQWS <- ZCTA_BWQS_COVID_shp %>%
  st_set_geometry(., NULL) %>%
  dplyr::select(zcta, BWQS_index)

Demographics_for_ridges <- Demographics %>%
  left_join(., ZCTA_BQWS, by = "zcta") %>%
  dplyr::select(-total_pop1) %>%
  gather(key = "Race/Ethnicity", value = "Population", hisplat_raceethnic:other_raceethnic) %>%
  mutate(`Race/Ethnicity` = if_else(`Race/Ethnicity`=="hisplat_raceethnic","Hispanic/Latinx",
                                    if_else(`Race/Ethnicity`=="nonhispLat_black_raceethnic", "Black",
                                            if_else(`Race/Ethnicity`=="nonhispLat_white_raceethnic", "White",
                                                    if_else(`Race/Ethnicity`== "nonhispLat_asian_raceethnic", "Asian", "Other")))),
         `Race/Ethnicity` = factor(`Race/Ethnicity`, levels = c( "White",  "Asian", "Other","Hispanic/Latinx","Black"))) 


Demographics_for_ridges %>%
  group_by(`Race/Ethnicity`) %>%
  summarise(weighted.mean(BWQS_index, Population),
            weightedMedian(BWQS_index, Population))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 5 x 3
##   `Race/Ethnicity` `weighted.mean(BWQS_index, Po… `weightedMedian(BWQS_index, P…
##   <fct>                                     <dbl>                          <dbl>
## 1 White                                      3.80                           4.24
## 2 Asian                                      4.86                           5.50
## 3 Other                                      4.99                           5.67
## 4 Hispanic/Latinx                            5.52                           5.94
## 5 Black                                      5.77                           6.07
fig4 <- ggplot(Demographics_for_ridges,
       aes(x = BWQS_index, y = `Race/Ethnicity`)) + 
  xlab("BWQS infection risk index")+
  theme(legend.position = "none") +
  geom_density_ridges(
    aes(height = ..density..,  
        weight = Population / sum(Population)),
    scale = 0.95,
    stat ="density") 
fig4

if(export.figs) ggsave(plot = fig4, filename = here("figures", paste0("fig4","_",Sys.Date(),".png")), dpi = 400, device = "png", width = 8, height = 5)

Below_25th_zctas <- ZCTA_BQWS %>%
  filter(BWQS_index<quantile(BWQS_index, .25))

Race_Ethncity_below25th <- Demographics %>%
  filter(zcta %in% Below_25th_zctas$zcta) %>%
  dplyr::select(-zcta) %>% 
  summarise_all(funs(sum(.))) %>%
  mutate_at(vars(ends_with("_raceethnic")), funs(round((./total_pop1)*100, 2)))%>%
  mutate(Group = "Below 25th percentile BWQS")


Between_25thand75th_zctas <- ZCTA_BQWS %>%
  filter(BWQS_index>quantile(BWQS_index, .25) & BWQS_index<quantile(BWQS_index, .75))

Race_Ethncity_btw_25th75th <- Demographics %>%
  filter(zcta %in% Between_25thand75th_zctas$zcta) %>%
  dplyr::select(-zcta) %>% 
  summarise_all(funs(sum(.))) %>%
  mutate_at(vars(ends_with("_raceethnic")), funs(round((./total_pop1)*100, 2)))%>%
  mutate(Group = "Between 25-75th percentile BWQS")

Above_75th_zctas <- ZCTA_BQWS %>%
  filter(BWQS_index>quantile(BWQS_index, .75))

Race_Ethncity_above_75th <- Demographics %>%
  filter(zcta %in% Above_75th_zctas$zcta) %>%
  dplyr::select(-zcta) %>% 
  summarise_all(funs(sum(.))) %>%
  mutate_at(vars(ends_with("_raceethnic")), funs(round((./total_pop1)*100, 2))) %>%
  mutate(Group = "Above 75th percentile BWQS")

Race_Ethncity_all <- Demographics %>%
  dplyr::select(-zcta) %>% 
  summarise_all(funs(sum(.))) %>%
  mutate_at(vars(ends_with("_raceethnic")), funs(round((./total_pop1)*100, 2)))%>%
  mutate(Group = "NYC Population")

Demographics_by_BWQS <- bind_rows(Race_Ethncity_all, Race_Ethncity_below25th, Race_Ethncity_btw_25th75th, Race_Ethncity_above_75th) %>%
  mutate(Other = other_raceethnic + nonhispLat_amerindian_raceethnic)  %>%
  dplyr::select(-other_raceethnic, -nonhispLat_amerindian_raceethnic, - total_pop1) %>%
  rename("Hispanic/Latinx" = "hisplat_raceethnic",
         "Black" = "nonhispLat_black_raceethnic", 
          "White" = "nonhispLat_white_raceethnic", 
         "Asian" = "nonhispLat_asian_raceethnic") %>%
  gather(., "Race/Ethnicity", "Proportion", 1:4,6) %>%
  mutate(`Race/Ethnicity` = factor(`Race/Ethnicity`, levels = c("Other", "Asian", "White", "Hispanic/Latinx", "Black")),
         Group = factor(Group, levels = c("NYC Population", "Below 25th percentile BWQS", "Between 25-75th percentile BWQS", "Above 75th percentile BWQS")))

labels_demographics <- c("NYC Population" = "NYC Population", "Below 25th percentile BWQS" = "Below 25th\npercentile BWQS", 
                         "Between 25-75th percentile BWQS" = "Between 25-75th\npercentile BWQS", "Above 75th percentile BWQS" = "Above 75th\npercentile BWQS")

sfig2 <- ggplot(Demographics_by_BWQS, aes(fill=`Race/Ethnicity`, y=Proportion, x=Group)) + 
    geom_rect(data = subset(Demographics_by_BWQS, Group=="NYC Population"), 
            aes(xmin=as.numeric(Group)-.35,xmax=as.numeric(Group)+.35, ymin=0, ymax=100, fill="gray85"), color = "gray", alpha = .1) +
  geom_bar(data = subset(Demographics_by_BWQS, Group!="NYC Population"), position="stack", stat="identity", width = .75) +
  geom_bar(data = subset(Demographics_by_BWQS, Group=="NYC Population"), position="stack", stat="identity", width = .45) +
  scale_fill_manual(breaks = c("Other", "Asian", "White", "Hispanic/Latinx", "Black"), 
                    values = c("#984ea3","#ff7f00","gray85", "#4daf4a", "#e41a1c", "#377eb8"))  +
  geom_text(aes(label=ifelse(Proportion >= 5, paste0(sprintf("%.0f", Proportion),"%"),"")),
            position=position_stack(vjust=0.5), colour="black", size = 8) + 
  scale_y_continuous("Percentage", expand = c(0,0), labels = function(x) paste0(x, "%")) + 
  scale_x_discrete(limits = c( "NYC Population", "Below 25th percentile BWQS", "Between 25-75th percentile BWQS", "Above 75th percentile BWQS"), 
                   labels = labels_demographics) + 
  theme_bw(base_size = 16) +
  theme(legend.text = element_text(size = 14),
        axis.text.y = element_text(size = 16),
        axis.text.x = element_text(size = 16, color = "black"), 
        axis.title.x = element_blank()) 
sfig2

if(export.figs) ggsave(sfig2, filename = here("figures", paste0("sfig2","_",Sys.Date(),".png")), device = "png", dpi = 500, width = 12, height = 6)



#### Step 2: Compare capacity to socially distance (as measured by transit data) by neighborhood-level risk scores ####  

UHF_ZipCodes1 <- UHF_ZipCodes %>%
  mutate(zcta = as.character(zip),
         uhf = as.character(code)) %>%
  dplyr::select(zcta, uhf)
  
ZCTA_BWQS_score <- ZCTA_BWQS_COVID_shp %>%
  st_drop_geometry() %>%
  dplyr::select(zcta, BWQS_index)
                
UHF_BWQS <- ZCTA_ACS_COVID %>%
  left_join(., UHF_ZipCodes1, by = "zcta") %>%
  left_join(., ZCTA_BWQS_score, join = "zcta") %>%
  group_by(uhf) %>%
  summarise(uhf_weighted_bwqs = weighted.mean(BWQS_index, total_pop1)) #population weighting
## Joining, by = "zcta"
## `summarise()` ungrouping output (override with `.groups` argument)
UHF_BWQS_COVID_shp <- UHF_shp %>% 
  mutate(uhf = as.character(UHFCODE)) %>%
  left_join(., UHF_BWQS, by = "uhf") %>%
  mutate(Risk = factor(ifelse(uhf_weighted_bwqs > median(uhf_weighted_bwqs, na.rm = T), #median split of neighborhood risk
                              "High", "Low"), levels = c("High", "Low")))

ggplot(UHF_BWQS_COVID_shp) + 
  geom_sf(aes(fill = uhf_weighted_bwqs))

Subway_ridership_by_UHF %>% filter(place=="all" & date >= "2020-01-29" & date <= "2020-04-30") %>% 
           ggplot() + geom_point(aes(date, usage.median.ratio, color = place))

Mean_Ridership <- Subway_ridership_by_UHF %>% #Use the mean ridership to identify the proper function for a nonlinear model
  filter(date >= "2020-01-29" & date <= "2020-04-30" & place=="all") %>%   
  arrange(date) %>%
  mutate(time_index = time(date))

Mean_Ridership %>% 
  ggplot() + geom_point(aes(date, usage.median.ratio))

#The weibull probability distribution works best for these data
fit_drm_w2.4 <- drm(usage.median.ratio ~ time_index, fct =  W2.4(), data = Mean_Ridership)

# suppress warning about vector recycling in predict.drc.R
handler <- function(w) if( any( grepl( "Recycling array of length 1 in array-vector arithmetic is deprecated.", w) ) ) 
  invokeRestart( "muffleWarning" )

DRM_mean_predictions <- bind_cols(Mean_Ridership,
                                  as_tibble(withCallingHandlers(predict(fit_drm_w2.4, interval = "confidence"), warning = handler ))) 

sfig4 <- ggplot() + geom_point(data = DRM_mean_predictions, aes(x = Mean_Ridership$date, y = Mean_Ridership$usage.median.ratio)) + 
  geom_ribbon(data = DRM_mean_predictions, aes(x = date, ymin = Lower, ymax = Upper), fill = "grey50", alpha = .5) +
  geom_line(aes(x = DRM_mean_predictions$date, y = DRM_mean_predictions$Prediction), color = "red") + 
  theme_bw(base_size = 16) +
  xlab("Date") +
  ylab("Relative Subway Use (%)")
sfig4

if(export.figs) ggsave(sfig4, filename = here("figures", paste0("sfig4", "_", Sys.Date(), ".png")), device = "png", dpi = 400, width = 8, height = 5)

#create a dataframe for the analysis 
service_changes_in_lowsubway_areas <- tibble(date = as.Date(c("2020-02-01", "2020-02-02", "2020-02-08", "2020-02-09", "2020-02-15", "2020-02-16", "2020-02-22", "2020-02-23", "2020-02-29", "2020-03-01", "2020-03-07", "2020-03-08", 
                                                      "2020-02-01", "2020-02-02", "2020-02-08", "2020-02-09", "2020-02-16", "2020-02-16")),
                                             place = c("403","403","403","403","403","403","403","403","403","403","403","403", 
                                                       "502","502","502","502","502","502"))

Subway_BWQS_df <- Subway_ridership_by_UHF %>%
  left_join(., st_set_geometry(UHF_BWQS_COVID_shp, NULL), by = c("place" = "uhf")) %>%
  filter(!is.na(place) & date >= "2020-01-29" & date <= "2020-04-30") %>%
  group_by(place) %>%
  arrange(date) %>%
  mutate(time_index = time(date)) %>%
  filter(!is.na(Risk) & date != "2020-02-17") %>% #removing Presidents' Day national holiday 
  anti_join(., service_changes_in_lowsubway_areas, by = c("date", "place")) #removing low outliers due to service changes in low subway density areas

fit_drm_all <- drm(usage.median.ratio ~ time_index, fct = W2.4(), data = Subway_BWQS_df)
fit_drm_interact <- drm(usage.median.ratio ~ time_index, curveid = Risk, fct = W2.4(), data = Subway_BWQS_df)

anova(fit_drm_all, fit_drm_interact) #comparing the mean only model to the interaction model 
## 
## 1st model
##  fct:      W2.4()
##  pmodels: 1 (for all parameters)
## 2nd model
##  fct:      W2.4()
##  pmodels: Risk (for all parameters)
## ANOVA table
## 
##           ModelDf    RSS Df F value p value
## 1st model    3291 21.283                   
## 2nd model    3287 16.868  4  215.09    0.00
summary(fit_drm_interact)
## 
## Model fitted: Weibull (type 2) (4 parms)
## 
## Parameter estimates:
## 
##           Estimate  Std. Error t-value   p-value    
## b:Low  -11.2825944   0.3456725 -32.639 < 2.2e-16 ***
## b:High -10.9784189   0.3294499 -33.324 < 2.2e-16 ***
## c:Low    0.0917715   0.0031436  29.193 < 2.2e-16 ***
## c:High   0.1631823   0.0032886  49.621 < 2.2e-16 ***
## d:Low    0.9760230   0.0028087 347.505 < 2.2e-16 ***
## d:High   0.9657486   0.0026961 358.199 < 2.2e-16 ***
## e:Low   44.7693331   0.0958824 466.919 < 2.2e-16 ***
## e:High  46.6277655   0.1033039 451.365 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error:
## 
##  0.07163616 (3287 degrees of freedom)
confint(fit_drm_interact)
##               2.5 %       97.5 %
## b:Low  -11.96034961 -10.60483918
## b:High -11.62436663 -10.33247119
## c:Low    0.08560791   0.09793519
## c:High   0.15673435   0.16963017
## d:Low    0.97051611   0.98152991
## d:High   0.96046233   0.97103483
## e:Low   44.58133784  44.95732828
## e:High  46.42521898  46.83031200
compParm(fit_drm_interact, "b", "-")
## 
## Comparison of parameter 'b' 
## 
##          Estimate Std. Error t-value p-value
## Low-High -0.30418    0.47752  -0.637  0.5242
compParm(fit_drm_interact, "c", "-")
## 
## Comparison of parameter 'c' 
## 
##            Estimate Std. Error t-value   p-value    
## Low-High -0.0714107  0.0045494 -15.697 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#b is not actually a slope - but a scaling factor that needs to be transformed into the slope
slopes <- as_tibble(coef(fit_drm_interact), rownames = "vars") %>%
  separate(col = vars, into = c("vars", "BWQS_risk"), sep = ":") %>%
  spread(., vars, value) %>%
  mutate(slope = -b*(-d/(4*e)))

as_tibble(confint(fit_drm_interact), rownames = "vars") %>%
  separate(col = vars, into = c("vars", "BWQS_risk"), sep = ":") %>%
  filter(vars == "b") %>%
  right_join(., slopes, by = "BWQS_risk") %>%
  mutate(slope_lower_ci = -`2.5 %`*(-d/(4*e)),
         slope_higher_ci = -`97.5 %`*(-d/(4*e))) %>%
  dplyr::select(BWQS_risk, slope, slope_lower_ci, slope_higher_ci)
## # A tibble: 2 x 4
##   BWQS_risk   slope slope_lower_ci slope_higher_ci
##   <chr>       <dbl>          <dbl>           <dbl>
## 1 Low       -0.0615        -0.0652         -0.0578
## 2 High      -0.0568        -0.0602         -0.0535
fit_drm_predictions <- as_tibble(withCallingHandlers(predict(fit_drm_interact, interval = "confidence"), warning = handler ))
Subway_BWQS_df1 <- bind_cols(Subway_BWQS_df, fit_drm_predictions) 

Subway_BWQS_df2 <- Subway_BWQS_df1 %>%
  filter(date>"2020-02-16") %>% #subsetting for visualization
  mutate(Risk = if_else(Risk == "High", "High (above median)", "Low (below median)"))

fig5 <- ggplot() + 
  geom_jitter(data = Subway_BWQS_df2, aes(x = date, y = usage.median.ratio, color = Risk), alpha = .5, position = position_jitter(height = 0, width = 0.4))+ 
  geom_ribbon(data = subset(Subway_BWQS_df2, Risk == "High (above median)"), aes(x = date, ymin = Lower, ymax = Upper), fill = "grey50") +
  geom_ribbon(data = subset(Subway_BWQS_df2, Risk == "Low (below median)"), aes(x = date, ymin = Lower, ymax = Upper), fill = "grey50") +
  geom_line(data = Subway_BWQS_df2, aes(x = date, y = Prediction, color = Risk)) +
  scale_x_date("Date", date_minor_breaks = "1 week") + 
  scale_y_continuous("Relative Subway Ridership (%)", labels = scales::percent) + 
  geom_vline(xintercept = date("2020-03-22"),color = "grey30", lty = 2) + 
  theme_bw(base_size = 16) +
  labs(colour="BWQS Infection Risk Index") +
  theme(legend.title = element_text(face = "bold", size = 12), legend.position = c(0.8, 0.7))  
fig5 

if(export.figs) ggsave(fig5, filename = here("figures", paste0("fig5", "_", Sys.Date() ,".png")), dpi = 600, width = 8, height = 6)

#which ones were dropped?
included_uhf <- Subway_BWQS_df %>% distinct(UHFCODE)
notincluded_uhf_shp <- UHF_BWQS_COVID_shp %>%
  filter(!UHFCODE %in% included_uhf$UHFCODE & 
           UHFCODE !=0) %>%
  mutate(NotIncluded = "*")

sfig3 <- ggplot() + 
  geom_sf(data = NYC_basemap_shp) +
  geom_sf(data = subset(UHF_BWQS_COVID_shp, !is.na(Risk)), aes(fill = Risk)) + 
  geom_sf(data = SubwayStation_shp) +
  geom_sf_text(data = notincluded_uhf_shp, aes(label = NotIncluded), size = 9) +
  xlab("") + ylab("") +
  theme_bw()
sfig3

if(export.figs) ggsave(sfig3, filename = here("figures", paste0("sfig3", "_", Sys.Date(),".png")), dpi = 500)



#### Part 3: Spatial analysis of mortality in relation to BWQS scores  ####

#Step 1: Create dataframes with the relevant information 
deaths_by23May2020_by_zcta1 <- deaths_by23May2020_by_zcta %>%
  left_join(., modzcta_to_zcta, by = c("MODIFIED_ZCTA" = "MODZCTA")) %>%
  mutate(zcta = as.character(MODIFIED_ZCTA)) %>%
  rename("deaths_count" = "COVID_DEATH_COUNT") %>%
  dplyr::select(zcta, deaths_count, COVID_DEATH_RATE) %>%
  distinct(zcta, .keep_all = T)

ZCTA_BWQS_COVID_shp1 <- ZCTA_ACS_COVID_shp %>% 
  left_join(.,ZCTA_BWQS_score, by = "zcta") %>%
  left_join(., deaths_by23May2020_by_zcta1, by = "zcta") %>%
  mutate(prop_65plus = age65_plus/total_pop1,
         zcta = as.numeric(zcta)) 

# Supplemental Figure 1 (B - Mortality)
sfig1b <- ZCTA_BWQS_COVID_shp1 %>% 
  ggplot() +
  geom_sf(data = NYC_basemap_shp)+
  geom_sf(aes(fill = COVID_DEATH_RATE), lwd = 0.2)+
  labs(fill = "Mortality per 100,000") +
  ggtitle("Cumulative COVID Mortality by zip code (May 23, 2020)") +
  scale_fill_gradientn(colours=brewer_pal("BuPu", type = "seq")(7)) + 
  theme_bw(base_size = 6) + 
  theme(axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        legend.title = element_text(face = "bold", size = 6), 
        panel.background = element_rect(fill = "#dedede"), 
        legend.background = element_rect(fill = "transparent"),
        legend.position = c(0.15, 0.80),
        legend.text = element_text(size = 6),
        legend.key.size = unit(1.1, "lines"),
        plot.margin = unit(c(4,0,4,0), "pt"),
        )
sfig1b

sfig1 <- ggarrange(sfig1a, sfig1b, nrow = 1)
ggexport(sfig1, filename = here("figures", paste0("sfig1", "_", Sys.Date(),".png")), res = 300, width = 7.3*300, height = 3.7*300)
## file saved to /home/carrid08/COVID_19_admin_disparities/figures/sfig1_2020-06-21.png
#Step 2: Run negative binomial model with spatial filtering  

#Step 2a: Identify the neighbor relationships 
spdat <- as_Spatial(ZCTA_BWQS_COVID_shp1)
ny.nb4 <- knearneigh(coordinates(spdat), k=4)
ny.nb4 <- knn2nb(ny.nb4)
ny.nb4 <- make.sym.nb(ny.nb4)
ny.wt4 <- nb2listw(ny.nb4, style="W")

#Step 2b: Fit the model to identify the component of the data with substantial spatial autocorrelation
fit.nb.ny<-glm.nb(deaths_count~offset(log(total_pop1))+scale(age65_plus/total_pop1)+BWQS_index, spdat)
lm.morantest(fit.nb.ny, listw = ny.wt4)
## 
##  Global Moran I for regression residuals
## 
## data:  
## model: glm.nb(formula = deaths_count ~ offset(log(total_pop1)) +
## scale(age65_plus/total_pop1) + BWQS_index, data = spdat, init.theta =
## 6.762237256, link = log)
## weights: ny.wt4
## 
## Moran I statistic standard deviate = 2.028, p-value = 0.02128
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I      Expectation         Variance 
##      0.096911775     -0.002287038      0.002392612
me.fit <- spatialreg::ME(deaths_count~offset(log(total_pop1))+scale(age65_plus/total_pop1)+BWQS_index,
                         spdat@data, family=negative.binomial(6.804), listw = ny.wt4, verbose=T, alpha=.1, nsim = 999)
## eV[,22], I: 0.02013214 ZI: NA, pr(ZI): 0.283
#Step 2c: Pull out these fits and visualize the autocorrelation
fits <- data.frame(fitted(me.fit))
spdat$me22 <- fits$vec22
spplot(spdat, "me22", at=quantile(spdat$me22, p=seq(0,1,length.out = 7)))

#Step 2d: Include the fits in our regression model as an additional parameter
clean.nb<-glm.nb(deaths_count~offset(log(total_pop1))+scale(age65_plus/total_pop1)+BWQS_index+fitted(me.fit), spdat@data)
tidy(clean.nb) %>% mutate(estimate_exp = exp(estimate))
## # A tibble: 4 x 6
##   term                        estimate std.error statistic  p.value estimate_exp
##   <chr>                          <dbl>     <dbl>     <dbl>    <dbl>        <dbl>
## 1 (Intercept)                  -7.23      0.0962   -75.2   0.           0.000726
## 2 scale(age65_plus/total_pop…   0.0301    0.0398     0.757 4.49e- 1     1.03    
## 3 BWQS_index                    0.189     0.0196     9.66  4.29e-22     1.21    
## 4 fitted(me.fit)               -1.04      0.402     -2.59  9.55e- 3     0.353
as_tibble(confint(clean.nb), rownames = "vars")%>% mutate_at(vars(2:3), .funs = list(~exp(.)))
## Waiting for profiling to be done...
## # A tibble: 4 x 3
##   vars                          `2.5 %` `97.5 %`
##   <chr>                           <dbl>    <dbl>
## 1 (Intercept)                  0.000600 0.000882
## 2 scale(age65_plus/total_pop1) 0.954    1.11    
## 3 BWQS_index                   1.16     1.26    
## 4 fitted(me.fit)               0.164    0.760
lm.morantest(clean.nb, resfun = residuals, listw=ny.wt4)
## 
##  Global Moran I for regression residuals
## 
## data:  
## model: glm.nb(formula = deaths_count ~ offset(log(total_pop1)) +
## scale(age65_plus/total_pop1) + BWQS_index + fitted(me.fit), data =
## spdat@data, init.theta = 7.102679753, link = log)
## weights: ny.wt4
## 
## Moran I statistic standard deviate = 1.5268, p-value = 0.0634
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I      Expectation         Variance 
##      0.072171696     -0.002826641      0.002412754
#### Appendix
Sys.time()
## [1] "2020-06-21 20:12:47 EDT"
sessioninfo::session_info()
## ─ Session info ───────────────────────────────────────────────────────────────
##  setting  value                       
##  version  R version 3.6.2 (2019-12-12)
##  os       Ubuntu 18.04.3 LTS          
##  system   x86_64, linux-gnu           
##  ui       X11                         
##  language (EN)                        
##  collate  en_US.UTF-8                 
##  ctype    en_US.UTF-8                 
##  tz       America/New_York            
##  date     2020-06-21                  
## 
## ─ Packages ───────────────────────────────────────────────────────────────────
##  package        * version    date       lib
##  abind            1.4-5      2016-07-21 [3]
##  askpass          1.1        2019-01-13 [3]
##  assertthat       0.2.1      2019-03-21 [3]
##  backports        1.1.8      2020-06-17 [1]
##  BBmisc           1.11       2017-03-10 [3]
##  boot             1.3-22     2019-04-26 [3]
##  broom          * 0.5.6      2020-04-20 [1]
##  callr            3.4.3      2020-03-28 [1]
##  car              3.0-2      2018-08-23 [3]
##  carData          3.0-2      2018-09-30 [3]
##  cellranger       1.1.0      2016-07-27 [3]
##  checkmate        2.0.0      2020-02-06 [1]
##  class            7.3-15     2019-01-01 [4]
##  classInt         0.4-3      2020-04-07 [1]
##  cli              2.0.2      2020-02-28 [1]
##  coda             0.19-3     2019-07-05 [3]
##  codetools        0.2-16     2018-12-24 [3]
##  colorspace       1.4-1      2019-03-18 [3]
##  cowplot          1.0.0      2019-07-11 [1]
##  crayon           1.3.4      2017-09-16 [3]
##  curl             3.3        2019-01-10 [3]
##  data.table       1.12.8     2019-12-09 [3]
##  DBI              1.1.0      2019-12-15 [1]
##  deldir           0.1-16     2019-01-04 [3]
##  digest           0.6.25     2020-02-23 [1]
##  dplyr          * 1.0.0      2020-05-29 [1]
##  drc            * 3.0-1      2016-08-30 [1]
##  e1071            1.7-3      2019-11-26 [1]
##  egg            * 0.4.2      2018-11-03 [3]
##  ellipsis         0.3.1      2020-05-15 [1]
##  evaluate         0.14       2019-05-28 [1]
##  expm             0.999-4    2019-03-21 [3]
##  fansi            0.4.1      2020-01-08 [1]
##  farver           2.0.3      2020-01-16 [1]
##  fastmatch        1.1-0      2017-01-28 [3]
##  forcats        * 0.4.0      2019-02-17 [3]
##  foreign          0.8-71     2018-07-20 [3]
##  fst              0.9.2      2020-04-01 [1]
##  gdata            2.18.0     2017-06-06 [3]
##  generics         0.0.2      2018-11-29 [3]
##  ggExtra        * 0.8        2018-04-04 [3]
##  ggplot2        * 3.3.2      2020-06-19 [1]
##  ggpubr         * 0.3.0      2020-05-04 [1]
##  ggridges       * 0.5.1      2018-09-27 [3]
##  ggsignif         0.6.0      2019-08-08 [1]
##  glue             1.4.1      2020-05-13 [1]
##  gmodels          2.18.1     2018-06-25 [3]
##  gridExtra      * 2.3        2017-09-09 [3]
##  gtable           0.3.0      2019-03-25 [3]
##  gtools           3.8.1      2018-06-26 [3]
##  haven            2.1.0      2019-02-19 [3]
##  here           * 0.1        2017-05-28 [3]
##  highr            0.8        2019-03-20 [3]
##  hms              0.5.3      2020-01-08 [1]
##  htmltools        0.5.0      2020-06-16 [1]
##  httpuv           1.5.1      2019-04-05 [3]
##  httr             1.4.0      2018-12-11 [3]
##  inline           0.3.15     2018-05-18 [3]
##  jsonlite         1.6.1      2020-02-02 [1]
##  Just.universal * 0.0.0.9000 2020-06-21 [1]
##  KernSmooth       2.23-16    2019-10-15 [4]
##  knitr            1.28       2020-02-06 [1]
##  labeling         0.3        2014-08-23 [3]
##  later            0.8.0      2019-02-11 [3]
##  lattice          0.20-38    2018-11-04 [4]
##  LearnBayes       2.15.1     2018-03-18 [3]
##  lifecycle        0.2.0      2020-03-06 [1]
##  loo              2.1.0      2019-03-13 [3]
##  lubridate      * 1.7.9      2020-06-08 [1]
##  magrittr         1.5        2014-11-22 [3]
##  maptools         0.9-5      2019-02-18 [3]
##  MASS           * 7.3-51.4   2019-04-26 [3]
##  Matrix         * 1.2-17     2019-03-22 [3]
##  matrixStats    * 0.54.0     2018-07-23 [3]
##  mgcv           * 1.8-28     2019-03-21 [3]
##  mime             0.9        2020-02-04 [1]
##  miniUI           0.1.1.1    2018-05-18 [3]
##  modelr           0.1.4      2019-02-18 [3]
##  MTA.turnstile  * 0.0.0.9000 2020-06-21 [1]
##  multcomp         1.4-10     2019-03-05 [3]
##  munsell          0.5.0      2018-06-12 [3]
##  mvtnorm          1.0-10     2019-03-05 [3]
##  nlme           * 3.1-140    2019-05-12 [3]
##  openxlsx         4.1.0      2018-05-26 [3]
##  ParamHelpers     1.14       2020-03-24 [1]
##  pdftools       * 2.3.1      2020-05-22 [1]
##  pillar           1.4.4      2020-05-05 [1]
##  pkgbuild         1.0.8      2020-05-07 [1]
##  pkgconfig        2.0.3      2019-09-22 [1]
##  plotrix          3.7-5      2019-04-07 [3]
##  plyr             1.8.5      2019-12-10 [3]
##  prettyunits      1.1.1      2020-01-24 [1]
##  processx         3.4.2      2020-02-09 [1]
##  promises         1.0.1      2018-04-13 [3]
##  ps               1.3.3      2020-05-08 [1]
##  purrr          * 0.3.2      2019-03-15 [3]
##  qpdf             1.1        2019-03-07 [1]
##  R6               2.4.1      2019-11-12 [1]
##  RANN             2.6.1      2019-01-08 [3]
##  rappdirs         0.3.1      2016-03-28 [3]
##  raster         * 2.9-5      2019-05-14 [3]
##  RColorBrewer     1.1-2      2014-12-07 [3]
##  Rcpp             1.0.4.6    2020-04-09 [1]
##  readr          * 1.3.1      2018-12-21 [3]
##  readxl           1.3.1      2019-03-13 [3]
##  rgdal            1.4-3      2019-03-14 [3]
##  rio              0.5.16     2018-11-26 [3]
##  rlang            0.4.6      2020-05-02 [1]
##  rmarkdown        2.3        2020-06-18 [1]
##  rprojroot        1.3-2      2018-01-03 [3]
##  rstan          * 2.18.2     2018-11-07 [3]
##  rstatix          0.6.0      2020-06-18 [1]
##  rstudioapi       0.11       2020-02-07 [1]
##  rvest            0.3.4      2019-05-15 [3]
##  sandwich         2.5-1      2019-04-06 [3]
##  scales         * 1.1.1      2020-05-11 [1]
##  sessioninfo      1.1.1      2018-11-05 [3]
##  sf             * 0.9-4      2020-06-13 [1]
##  shiny            1.3.2      2019-04-22 [3]
##  sp             * 1.4-2      2020-05-20 [1]
##  spatialreg     * 1.1-5      2019-12-01 [1]
##  spData         * 0.3.0      2019-01-07 [3]
##  spdep          * 1.1-2      2019-04-05 [3]
##  StanHeaders    * 2.18.1     2019-01-28 [3]
##  stringi          1.4.6      2020-02-17 [3]
##  stringr        * 1.4.0      2019-02-10 [3]
##  survival         2.44-1.1   2019-04-01 [3]
##  TH.data          1.0-10     2019-01-21 [3]
##  tibble         * 3.0.1      2020-04-20 [1]
##  tidycensus     * 0.9.6      2020-01-25 [1]
##  tidyr          * 1.1.0      2020-05-20 [1]
##  tidyselect       1.1.0      2020-05-11 [1]
##  tidyverse      * 1.2.1      2017-11-14 [3]
##  tigris           0.7        2018-04-14 [3]
##  units            0.6-7      2020-06-13 [1]
##  utf8             1.1.4      2018-05-24 [3]
##  uuid             0.1-2      2015-07-28 [3]
##  vctrs            0.3.1      2020-06-05 [1]
##  withr            2.2.0      2020-04-20 [1]
##  xfun             0.15       2020-06-21 [1]
##  xgboost          1.1.1.1    2020-06-14 [1]
##  xml2             1.2.0      2018-01-24 [3]
##  xtable           1.8-4      2019-04-21 [3]
##  yaml             2.2.1      2020-02-01 [1]
##  zip              2.0.2      2019-05-13 [3]
##  zoo              1.8-8      2020-05-02 [1]
##  source                                 
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.5.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.2)                         
##  Github (justlab/Just_universal@78812f5)
##  CRAN (R 3.6.1)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.0)                         
##  Github (justlab/MTA_turnstile@6c8bd76) 
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.0)                         
##  CRAN (R 3.6.2)                         
## 
## [1] /home/carrid08/R/x86_64-pc-linux-gnu-library/3.6
## [2] /usr/local/lib/R/site-library
## [3] /usr/lib/R/site-library
## [4] /usr/lib/R/library